# -*- mode: snippet -*-
# name: mice from mice Multivariate Imputation By Chained Equations
# key: mice.mice.template
# --
cat("
###
### Missing data imputation with mice
################################################################################\n")

## Extract all variable names in dataset
all_vars <- names($1)

## names of variables with missingness
miss_vars <- names($1)[colSums(is.na($1)) > 0]

## mice predictorMatrix
## A square matrix of size ‘ncol(data)’ containing 0/1
## data specifying the set of predictors to be used for each
## target column. Rows correspond to target variables (i.e.
## variables to be imputed), in the sequence as they appear in
## data. A value of '1' means that the column variable is used
## as a predictor for the target variable (in the rows). The
## diagonal of ‘predictorMatrix’ must be zero. The default for
## ‘predictorMatrix’ is that all other columns are used as
## predictors (sometimes called massive imputation). Note: For
## two-level imputation codes '2' and '-2' are also allowed.
##
predictorMatrix <- matrix(0,
                          ncol = length(all_vars), nrow = length(all_vars),
                          dimnames = list(all_vars, all_vars))

## Avoid perfect linear dependence
## http://stats.stackexchange.com/questions/127104/r-mice-imputation-failing
## Too many weights issue in nnet called by mice
## http://stackoverflow.com/questions/28551633/error-in-r-mice-package-too-many-weights


cat("
###  Specify Variables informing imputation\n")
## These can be either complete variables or variables with missingness.
## Those with missingness must be imputed.
## Explicitly specify.
imputer_vars <- c($2)
## Keep variables that actually exist in dataset
imputer_vars <- intersect(unique(imputer_vars), all_vars)
imputer_vars
imputer_matrix <- predictorMatrix
imputer_matrix[,imputer_vars] <- 1


cat("
###  Specify variables with missingness to be imputed \n")
## Could specify additional variables that are imputed,
## but does not inform imputation.
imputed_only_vars <- c($3)
## Imputers that have missingness must be imputed.
imputed_vars <- intersect(unique(c(imputed_only_vars, imputer_vars)), miss_vars)
imputed_vars
imputed_matrix <- predictorMatrix
imputed_matrix[imputed_vars,] <- 1
## Cluster variable is -2
cluster_var <- "id"
imputer_matrix[,cluster_var] <- -2

cat("
###  Construct a full predictor matrix (rows: imputed variables; cols: imputer variables)\n")
## Keep correct imputer-imputed pairs only
predictorMatrix <- imputer_matrix * imputed_matrix
## Diagonals must be zeros (a variable cannot impute itself)
diag(predictorMatrix) <- 0
predictorMatrix


cat("
###  Dry-run mice for imputation methods\n")
dry_mice <- mice(data = $1, m = 1, predictorMatrix = predictorMatrix, maxit = 0)
## Update predictor matrix
predictorMatrix <- dry_mice$predictorMatrix
cat("###   Imputers (non-zero columns of predictorMatrix)\n")
imputer_vars <- colnames(predictorMatrix)[colSums(predictorMatrix) > 0]
imputer_vars
cat("###   Imputed (non-zero rows of predictorMatrix)\n")
imputed_vars <- rownames(predictorMatrix)[rowSums(predictorMatrix) > 0]
imputed_vars
cat("###   Imputers that are complete\n")
setdiff(imputer_vars, imputed_vars)
cat("###   Imputers with missingness\n")
intersect(imputer_vars, imputed_vars)
cat("###   Imputed-only variables without being imputers\n")
setdiff(imputed_vars, imputer_vars)
cat("###   Variables with missingness that are not imputed\n")
setdiff(miss_vars, imputed_vars)
cat("###   Relevant part of predictorMatrix\n")
predictorMatrix[rowSums(predictorMatrix) > 0, colSums(predictorMatrix) > 0]


## Empty imputation method to really exclude variables
## http://www.stefvanbuuren.nl/publications/MICE%20in%20R%20-%20Draft.pdf
##
## MICE will automatically skip imputation of variables that are complete.
## One of the problems in previous versions of MICE was that all incomplete
## data needed to be imputed. In MICE 2.0 it is possible to skip imputation
## of selected incomplete variables by specifying the empty method "".
## This works as long as the incomplete variable that is skipped is not being
## used as a predictor for imputing other variables.
## Note: puttting zeros in the predictorMatrix alone is NOT enough!
##
dry_mice$method[setdiff(all_vars, imputed_vars)] <- ""
cat("###   Methods used for imputation\n")
dry_mice$method[sapply(dry_mice$method, nchar) > 0]


cat("
###  Run mice\n")
M <- 4
cat("###   Imputing", M, "times\n")

## Set seed for reproducibility
set.seed(3561126)

## Parallelized execution
miceout <- foreach(i = seq_len(M), .combine = ibind) %dorng% {
    cat("## Started iteration", i, "\n")
    miceout <- mice(data = $1, m = 1, print = TRUE,
                    predictorMatrix = predictorMatrix, method = dry_mice$method,
                    MaxNWts = 2000)
    cat("## Completed iteration", i, "\n")
    ## Make sure to return the output
    miceout
}


cat("
###  Show mice results\n")
## mice object ifself
miceout
## Variables that no longer have missingness after imputation
cat("###   Variables actually imputed\n")
actually_imputed_vars <-
    setdiff(names($1)[colSums(is.na($1)) > 0],
            names(complete(miceout, action = 1))[colSums(is.na(complete(miceout, action = 1))) > 0])
actually_imputed_vars

## Examine discrepancies
cat("###   Variables that were unexpectedly imputed\n")
setdiff(actually_imputed_vars, imputed_vars)
cat("###   Variables that were planned for MI but not imputed\n")
setdiff(imputed_vars, actually_imputed_vars)

## Still missing variables
cat("###   Variables still having missing values\n")
names(complete(miceout, action = 1))[colSums(is.na(complete(miceout, action = 1))) > 0]
$0